# load packages
require(ggordiplots)
library(microViz)
library("vegan")
library(phyloseqCompanion)
library(metagMisc)

# load phyloseq data
load("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code/phyloseq final variables.RData")

# transform to equal sampling depth
set.seed(28132)
physeq.r = rarefy_even_depth(physeq)

# use proportions instead of counts
physeq.prop <- transform_sample_counts(physeq.r, function(x) x/sum(x))

# filter 
physeq_nobog <- subset_samples(physeq, Transect != "Bog")
physeq <- tax_glom(physeq, taxrank = 'Genus', NArm = F)

### NOTE: IF ANOSIM DOESN'T WORK, IT'S BECAUSE OF LOADING DADA2 PACKAGE, RESTART R AND ONLY LOAD ABOVE PACKAGES

############################################################
# beta diversity by side close to stream (1-6m) at Hansen
############################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq.prop, Stream=="Hansen")
physeq_hansen_close <- subset_samples(physeq_hansen, Distance < 7 & Distance > 1)

physeq.ord <- ordinate(physeq_hansen_close, "PCoA", "bray")
physeq.ord <- ordinate(physeq_hansen_close, "NMDS", "bray")

# run PCA ordination
p <- plot_ordination(physeq_hansen_close, physeq.ord, color = "Side")
p + theme_bw() + stat_ellipse(aes(group = Side)) ## add ellipses around each level
p <- p + theme_bw() + stat_chull(aes(group = Side)) + theme(axis.title.y = element_text(size = 22), axis.text.y = element_text(size = 22), 
    axis.text.x = element_text(size = 22), axis.title.x = element_text(size = 22), legend.text=element_text(size=22), legend.title=element_text(size=22)) +
  scale_color_manual(labels = c("Salmon-enhanced", "Salmon-depleted"), values = c("#F8766D", "#00BFC4")) 
## or add hulls around each level
# hulls works best with rarified and transformed data to proportions and NMDS

setwd("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing")
ggsave(plot=p, width=10, height=6, dpi=300, filename = "Fig3b.jpg")

# ANOSIM
transect_group = get_variable(physeq, "Side")
transect_ano = anosim(distance(physeq_hansen_close, "bray"), transect_group, permutations = 9999)
transect_ano$signif
transect_ano$statistic
# significantly different for 1-6 and 3-6 m
# stats are about the same for rariefied or transformed data vs. not

############################################################
# beta diversity by side at Hansen
############################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq, Stream=="Hansen")

physeq.ord <- ordinate(physeq_hansen, "PCoA", "bray")
physeq.ord <- ordinate(physeq_hansen, "NMDS", "bray")

# run PCA ordination
p <- plot_ordination(physeq_hansen, physeq.ord, color = "Side")
p + theme_bw() + stat_ellipse(aes(group = Side)) ## add ellipses around each level
p + theme_bw() + stat_chull(aes(group = Side)) ## or add hulls around each level

# ANOSIM
transect_group = get_variable(physeq_hansen, "Side")
transect_ano = anosim(distance(physeq_hansen, "bray"), transect_group, permutations = 9999)
transect_ano$signif
transect_ano$statistic

# not significantly different!

############################################################
# beta diversity by stream
############################################################

# PCOA and NMDS are different
# for ordinate, can do either a PCA(MDS) or NMDS; distance could be bray-curtis, 
# unweighted or weighted UniFrac, or jaccard
physeq.ord <- ordinate(physeq, method="PCoA", distance="bray")
physeq.ord <- ordinate(physeq, method="PCoA", distance="unifrac")
physeq.ord <- ordinate(physeq, method="PCoA", distance="wunifrac")
physeq.ord <- ordinate(physeq, method="PCoA", distance="jaccard")
physeq.ord <- ordinate(physeq, method='RDA')
physeq.ord <- ordinate(physeq, "NMDS", "bray")
physeq.ord <- ordinate(physeq, "PCoA", "bray")

# run PCA ordination
p <- plot_ordination(physeq, physeq.ord, color = "Stream")
#p <- plot_ordination(physeq, physeq.ord, type="samples", color="Stream", shape= "Stream", 
#                     title="Samples") + geom_point(size=3)
p <- p + theme_bw() + ggtitle("MDS + BC") ## add title and plain background
p + stat_chull(aes(group = Stream)) ## add hulls around each level
p + stat_ellipse(aes(group = Stream)) ## add ellipses around each level

# polygons
p + geom_polygon(aes(fill=Stream)) + geom_point(size=5) + ggtitle("samples")

# gg_ordiplot seems to only work with rda method or NMDS but not PCOA if using physeq object
# to bypass this, I created mds using direct species abundance matrix
gg_ordiplot(physeq.ord, groups=sample_data(physeq)$Stream, ellipse = F, hull=T)

# or could do manually by extracting abundances from physeq and running MDS
fungi.mds <- metaMDS(Species_abundance, distance = "bray", autotransform = FALSE)

# or another way
fungi.bray <- vegdist(Species_abundance, method = "bray")
ord <- cmdscale(fungi.bray, k = nrow(Species_abundance) - 1, eig = TRUE, add = TRUE)

# pull sample data
fungi_samples <- sample.data.frame(physeq)

# gg_ordiplot
gg_ordiplot(fungi.mds, groups=fungi_samples$Stream, ellipse = F, hull=T)
gg_ordiplot(ord, groups = fungi_samples$Stream, hull = T, spiders = F, ellipse = F)

# ordiplots
ordiplot1 <- ordihull(fungi.mds,groups=fungi_samples$Stream,draw="polygon",col="grey90",label=F)
orditorp(fungi.mds,groups=fungi_samples$Stream, display="sites",col="red",air=0.01)
ordihull(fungi.mds, fungi_samples$Stream, display = "sites", draw = c("polygon"),
         col = NULL, border = c("gray0", "gray20", "gray48"), lty = c(1, 2, 3),lwd = 2.5)

# ANOSIM
transect_group = get_variable(physeq, "Stream")
transect_ano = anosim(distance(physeq, "bray"), transect_group, permutations = 9999)
transect_ano$signif
transect_ano$statistic

############################################################
# beta diversity by transect at Hansen
############################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq, Stream=="Hansen")

physeq.ord <- ordinate(physeq_hansen, "PCoA", "bray")
physeq.ord <- ordinate(physeq_hansen, "NMDS", "bray")

# run PCA ordination
p <- plot_ordination(physeq_hansen, physeq.ord, color = "Transect")
p + theme_bw() + stat_ellipse(aes(group = Transect)) ## add ellipses around each level
p + theme_bw() + stat_chull(aes(group = Transect)) ## or add hulls around each level

# ANOSIM
transect_group = get_variable(physeq_hansen, "Transect")
transect_ano = anosim(distance(physeq_hansen, "bray"), transect_group, permutations = 9999)
transect_ano$signif
transect_ano$statistic

############################################################
# beta diversity by transect at Happy
############################################################

# filter phyloseq object by Happy stream
physeq_happy <- subset_samples(physeq, Stream=="Happy")

physeq.ord <- ordinate(physeq_happy, "PCoA", "bray")
physeq.ord <- ordinate(physeq_happy, "NMDS", "bray")

# run PCA ordination
p <- plot_ordination(physeq_happy, physeq.ord, color = "Transect")
p + theme_bw() + stat_ellipse(aes(group = Transect)) ## add ellipses around each level
p + theme_bw() + stat_chull(aes(group = Transect)) ## or add hulls around each level

# ANOSIM
transect_group = get_variable(physeq_happy, "Transect")
transect_ano = anosim(distance(physeq_happy, "bray"), transect_group, permutations = 9999)
transect_ano$signif
transect_ano$statistic

############################################################
# beta diversity by transect at Yako
############################################################

# filter phyloseq object by Yako stream
physeq_yako <- subset_samples(physeq, Stream=="Yako")

physeq.ord <- ordinate(physeq_yako, "PCoA", "bray")
physeq.ord <- ordinate(physeq_yako, "NMDS", "bray")

# run PCA ordination
p <- plot_ordination(physeq_yako, physeq.ord, color = "Transect")
p + theme_bw() + stat_ellipse(aes(group = Transect)) ## add ellipses around each level
p + theme_bw() + stat_chull(aes(group = Transect)) ## or add hulls around each level

# ANOSIM
transect_group = get_variable(physeq_yako, "Transect")
transect_ano = anosim(distance(physeq_yako, "bray"), transect_group, permutations = 9999)
transect_ano$signif
transect_ano$statistic

############################################################
# beta diversity by distance at all streams
############################################################

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq)
dist.euc = dist(fungi_samples$Distance, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa.all = as.vector(fungi.bray)
tt.all = as.vector(dist.euc)

#new data frame with vectorized distance matrices
mat = data.frame(aa.all,tt.all)

#abundance vs temperature
ggplot(mat, aes(y = aa.all, x = tt.all)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  #geom_smooth(method = "lm", alpha = .15) + 
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) 
  #geom_abline(intercept = 0.8693353661, slope = 0.00057)
   #scale_y_continuous(limits=c(0.9,1))

coef(lm(aa.all ~ tt.all, data = mat))
corr <- cor.test(x=mat$aa.all, y=mat$tt.all, method = 'spearman')
############################################################
# beta diversity by distance at Hansen
############################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq, Stream=="Hansen")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_hansen, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_hansen)
dist.euc = dist(fungi_samples$Distance, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa.hansen = as.vector(fungi.bray)
tt.hansen = as.vector(dist.euc)

#new data frame with vectorized distance matrices
mat.hansen = data.frame(aa.hansen,tt.hansen)

#abundance vs temperature
ggplot(mat.hansen, aes(y = aa.hansen, x = tt.hansen)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", alpha = .15) +
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) +ylim(0.7,1)
  #geom_smooth(method = "lm", alpha = .15) + scale_y_continuous(limits=c(0.8,1))

############################################################
# beta diversity by distance at Hansen LEFT side
############################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq, Stream=="Hansen")
physeq_hansen_left <- subset_samples(physeq_hansen, Side=="SL")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_hansen_left, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_hansen_left)
dist.euc = dist(fungi_samples$Distance, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa = as.vector(fungi.bray)
tt = as.vector(dist.euc)
name <- rep("Hansen Left",253)

#new data frame with vectorized distance matrices
mat.hansen.left = data.frame(aa, tt, name)

#abundance vs temperature
ggplot(mat.hansen.left, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", alpha = .15) + 
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) 
  #geom_smooth(method = "lm", alpha = .15) + scale_y_continuous(limits=c(0.9,1))

############################################################
# beta diversity by distance at Hansen RIGHT side
############################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq, Stream=="Hansen")
physeq_hansen_right <- subset_samples(physeq_hansen, Side=="SR")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_hansen_right, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_hansen_right)
dist.euc = dist(fungi_samples$Distance, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa = as.vector(fungi.bray)
tt = as.vector(dist.euc)
name <- rep("Hansen Right",253)

#new data frame with vectorized distance matrices
mat.hansen.right = data.frame(aa, tt, name)

#abundance vs temperature
ggplot(mat.hansen.right, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) +
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", alpha = .15) +
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) 
  #geom_smooth(method = "lm", alpha = .15) #+ scale_y_continuous(limits=c(0.85,1))
############################################################
# beta diversity by distance at Happy
############################################################

# filter phyloseq object by Happy stream
physeq_happy <- subset_samples(physeq, Stream=="Happy")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_happy, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_happy)
dist.euc = dist(fungi_samples$Distance, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa = as.vector(fungi.bray)
tt = as.vector(dist.euc)
name <- rep("Happy",325)

#new data frame with vectorized distance matrices
mat.happy = data.frame(aa, tt, name)

#abundance vs temperature
ggplot(mat.happy, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", se = FALSE) +
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black"))

############################################################
# beta diversity by distance at Yako
############################################################

# filter phyloseq object by Happy stream
physeq_yako <- subset_samples(physeq, Stream=="Yako")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_yako, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_yako)
dist.euc = dist(fungi_samples$Distance, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa = as.vector(fungi.bray)
tt = as.vector(dist.euc)
name <- rep("Yako",276)

#new data frame with vectorized distance matrices
mat.yako = data.frame(aa, tt, name)

#abundance vs temperature
ggplot(mat.yako, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", se = FALSE) +
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) + ylim(0.8,1)

# plot all together

# combine abundance dissimilarity vectors, euclidean distance vectors and labels together
all <- rbind(mat.hansen.left, mat.hansen.right, mat.happy, mat.yako)

ggplot(all, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", alpha = .15, aes(fill = name, color=name)) + 
  scale_y_continuous(limits=c(0.96,1))
